In [24]:
x = ["aardvark", "bee", 'croco', 'duck', "emo"]
In [25]:
#sorted by second letter ['aardvark', 'bee', 'emu', 'crocodrile; 'duck' ]
sorted(x, reverse=True)
Out[25]:
In [26]:
#sorted(x, key=???) when you want to sort by the second letter of the list.
In [27]:
def get_second_letter(s):
return s[1]
In [28]:
get_second_letter("cheese")
Out[28]:
In [29]:
sorted(x, key=get_second_letter)
Out[29]:
In [30]:
#normal function
def get_second_letter(s):
return s[1]
In [31]:
get_second_letter = lambda s: s[1]
In [32]:
get_second_letter("hello")
Out[32]:
In [35]:
sorted(x, key = lambda s: s[1])
Out[35]:
In [19]:
# [P['name'] for p in sorted(planets, hey=lambda x: x['moons'])]
In [20]:
# def get moon_count(d):
# return d['moons']
# sorted(planets, key=get_moon_count)
In [21]:
#written in SQL:
#Select name from planet order by moons
In [39]:
t = [5]
In [40]:
for item in t:
print(item * item)
In [41]:
t.append(30)
In [42]:
carefree_list = [5, 33, 32, 66, 44]
In [43]:
carefree_list[1] = 'Mr Fluffypants'
In [45]:
carefree_list
Out[45]:
In [48]:
t[1] = 'Mr. Fluffypants'
In [49]:
t
Out[49]:
In [50]:
#inmutable data type
#one benefit is exactly that: cant be changed
#other benefit is that tuples are memory-effcient
In [51]:
import sys
In [52]:
hello = [1, 2, 3]
In [54]:
sys.getsizeof(hello)
Out[54]:
In [59]:
test = "one 1 two 2 three 3 four 4 five 5"
In [63]:
import re
re.findall(r"\w+ \d", test)
Out[63]:
In [64]:
for item in re.findall(r"(\w+) (\d)", test):
In [65]:
all_subjects = open("enronsubjects.txt").read()
In [68]:
[item[0] for item in re.findall (r"(\d{3})-(\d{3})-(\d{4})", all_subjects)]
Out[68]:
In [70]:
re.findall(r"\$(\d+) ?(\w+)", all_subjects)
Out[70]:
In [73]:
vals= []
for item in re.findall(r"\$(\d+) ?([mMbBkK])", all_subjects):
multiplier = item[1].lower()
number_val = int(item[0])
if multiplier == 'K':
number_val *= 1000
elif multiplier == 'm':
number_val *= 1000000
elif multiplier == 'b':
number_val *= 100000000
vals.append(number_val)
sum(vals)
Out[73]:
In [74]:
message = "this is a test, this is only a test"
In [76]:
message.replace("this", "that").replace("text", "walrus")
Out[76]:
In [78]:
message = "This is a test, this is only a test"
re.sub(r"[Tt]his", "that", message)
Out[78]:
In [80]:
re.sub(r"\b\w+\b", "PIKACHU", message)
Out[80]:
In [85]:
anon = re.sub(r"(\d{3})-(\d{3})-(\d{4})", r"\1-\2-XXXX", all_subjects)
In [89]:
re.findall(r"\d{3}-\d{3}-X{4}.{,20}", anon)
Out[89]:
In [111]:
from urllib.request import urlretrieve
urlretrieve("https://raw.githubusercontent.com/ledeprogram/data-and-databases/master/menupages-morningside-heights.html", "menu.html")
Out[111]:
In [99]:
#store:
# *restaurant name
# *price ($$$$$)
# *cuisines
# every restauatrant has a `<tr>` that is a child of a the `<table>` tag with class `search-results`
# restaurant are in <td> tag with class= `name-address`
# restaurant names are un <a> tag inside that <td>
# restaurant price in a `span` insude an `<td> with a class `price`
# the cuisine of the restaurant is in a `<td> tag iwth no class, the fifth `<td> tag that is a child of a the restaurants `<tr>`
#target:
*list of diccionaties
[
{'name: "Brads", price: 1, Cuisines: [coffee]},
{}'name': "Cafe Nana", 'price'
]
In [100]:
# syntax: urlretreive(url, filename)
In [113]:
from bs4 import BeautifulSoup
In [120]:
raw_html = open("menu.html").read()
soup = BeautifulSoup(raw_html, "html.parser")
In [121]:
#Just the names
In [122]:
search_table = soup.find("table", {'class': 'search-results'})
table_body = search_table.find('tbody')
for tr_tag in table_body.find_all('tr'):
name_adress_tag = tr_tag.find('td', {'class': 'name-adress'})
a_tag = name_adress_tag.find('a')
print(tr_tag)
In [ ]:
#how about names and prices? and maybe the cuisine too
In [ ]:
In [123]:
search_table = sopu.find("table", {'class': 'search-results'})
table_body = search_table.find('tbody')
for tr_tag in table_body.find_all('tr'):
# get restaurant name from the inside a td
# restaurant name = get_name(tr_tag)
name_adress_tag = tre_tag.find('td', {'class': 'name-adress'})
a_tag = name_adress_tag.find('a')
restaurant_name = a_tag.string
# get the price from the span if present
price_tag = tr.tag.find('td', {'class': 'price'})
price_span_tag = price_tag.fid('span')
if price_pan_tag:
price = price_span_tag.string
else:
price = 0
print(restaurant_name, price)
In [ ]:
# much organized code using functions
In [124]:
def get_name(tr_tag):
name_adress_tag = tre_tag.find('td', {'class': 'name-adress'})
a_tag = name_adress_tag.find('a')
restaurant_name = a_tag.string
return restaurant_name
def get_price(tr_tag):
price_tag = tr.tag.find('td', {'class': 'price'})
price_span_tag = price_tag.fid('span')
if price_pan_tag:
price = price_span_tag.string
else:
price = 0
return price
def get_cuisines(tr_tag):
all_td_tag = tr_tag.find_all('td')
cuisine_tag = all_td_tags[4]
cuisines = int(cuisining_tag.string)
if cuisines:
cuisines_list = cuisines.split(", ")
else:
cuisines_list = []
return cuisines_list
In [125]:
restaurants = []
search_table = sopu.find("table", {'class': 'search-results'})
table_body = search_table.find('tbody')
for tr_tag in table_body.find_all('tr'):
restaurant_name = get_name(tr_tag)
price = get_price(tr_tag)
cuisines = get_cuisines(tr_tag)
rest_dict = {'name': restaurant_name, 'price': price, 'cuisines': cuisines }
restaurants.append(rest_dict)
restaurants
In [126]:
# we want a list of str with get_cousines as a function
In [127]:
import
In [ ]: